********************************************************************************************;
****** This file pulls and merges the datasets necessary for the analyses in ***************;
****** Dettling, Goodman, and Smith (2017).                                  ***************;
********************************************************************************************;


***Before running, you need the following SAS datasets from College Board:;
*All_wk_XXXX (where XXXX = year) - PSAT and SAT takers between 2000 and 2008;
*asc_2011 - Crosswalk between DI codes (score send codes) and IPEDS' unitids;


*Creating directories;
libname CB_raw "\\Info for Replication\Data\Raw Data\College Board Data";
libname Other_raw "\\Info for Replication\Data\Raw Data\Other Data";
libname Processed "\\Info for Replication\Data\Processed Data";


**** Grabbing all PSAT and SAT takers from the high school classes of 2001 and 2008 (and separately 1996-1998);

%macro psat_sat(year=);

data all_wk_&year;
	set CB_raw.all_wk_&year;
	if max(psat_taker,sat_taker) = 1;
	satrmap = satmap;
	year = &year;
	keep STATESAT SATVRECN SATMRECN year DINUM FATHEDUC MOTHEDUC INCOME INCOME08 zip DI01--DI30 sat_taker PSAT_TAKER p_ethnic p_sex p_cgpa 
	PSATVRCN PSATMRCN PSATW pn_sr_vrcn--pn_fr_w p_aicode;
run;

%mend psat_sat;


%psat_sat (year= 1996);
%psat_sat (year= 1997);
%psat_sat (year= 1998);

%psat_sat (year= 2001);
%psat_sat (year= 2002);
%psat_sat (year= 2003);
%psat_sat (year= 2004);
%psat_sat (year= 2005);
%psat_sat (year= 2006);
%psat_sat (year= 2007);
%psat_sat (year= 2008);


*Appending 2001-2008 into one dataset and doing some data work;
data broadband;
	set all_wk_20: ;

	*To create the 1996-1998 dataset, comment out previous line of code and uncomment next line) to just use the 1996-1998 College Board data;
	*set all_wk_19: ;


	if p_ethnic ~="" and p_ethnic > 0;

	zip = p_zip5*1;
	*Must have a valid zip code;
	if zip ~=.;

	*Income variable in 2008 changed;
	if year = 2008 then income = income08;


	*Creating PSAT variable that is last PSAT score;
	if PSATVRCN =. then PSATVRCN = pn_sr_vrcn;
	if PSATVRCN =. then PSATVRCN = pn_jr_vrcn;
	if PSATVRCN =. then PSATVRCN = pn_so_vrcn;
	if PSATVRCN =. then PSATVRCN = pn_fr_vrcn;

	if PSATMRCN =. then PSATMRCN = pn_sr_mrcn;
	if PSATMRCN =. then PSATMRCN = pn_jr_mrcn;
	if PSATMRCN =. then PSATMRCN = pn_so_mrcn;
	if PSATMRCN =. then PSATMRCN = pn_fr_mrcn;

	if PSATW =. then PSATW = pn_sr_w;
	if PSATW =. then PSATW = pn_jr_w;
	if PSATW =. then PSATW = pn_so_w;
	if PSATW =. then PSATW = pn_fr_w;

	drop pn_sr_vrcn--pn_fr_w income08;

run;


*Bringing in states, fips codes, lats and longs by zip code;
data zip_codes;
	set Other_raw.Zipcodedownload;
	zip = ZIPCode;
	statefip = StateFIPS;
	keep zip StateAbbr statefip latitude longitude;
run;


proc sort data = broadband;
	by zip;
run;

proc sort data = zip_codes;
	by zip;
run;

*Merging in states, fips codes, lats and longs;
data broadband;
	merge broadband (in = a) zip_codes;	
	by zip;
	if a;

	*Only using students in 50 states and DC;
	if statefip < 60;
	if statefip~=.;
run;



*Bringing in crosswalk between DI codes (score send codes) and ipeds unitids;
Data asc_2011;
	set di_code.asc_2011;
run;


proc sort data=asc_2011;
	by cb_code;
run;


*Creating 30 crosswalks for each possible score sends per student and merging in IPEDS unitid to the 30 DI variables;
%macro number(num=);
Data ipeds&num;
	set asc_2011;
	rename cb_code= di&num ;
	rename unitid=unitid&num;
run;

PROC SORT DATA = broadband;
	by DI&NUM;
RUN;

data broadband;
	merge broadband (in = a) ipeds&num ;
	by DI&NUM;
	if a;
RUN;

%mend number;

%number (num=01);
%number (num=02);
%number (num=03);
%number (num=04);
%number (num=05);
%number (num=06);
%number (num=07);
%number (num=08);
%number (num=09);
%number (num=10);
%number (num=11);
%number (num=12);
%number (num=13);
%number (num=14);
%number (num=15);
%number (num=16);
%number (num=17);
%number (num=18);
%number (num=19);
%number (num=20);
%number (num=21);
%number (num=22);
%number (num=23);
%number (num=24);
%number (num=25);
%number (num=26);
%number (num=27);
%number (num=28);
%number (num=29);
%number (num=30);

*Dropping old variables;
data broadband;
	set broadband;
	drop DI01-DI30;
run;


*Bringing in ipeds lats and longs;
Data lats_longs;
	set Other_raw.ipeds_info;
	keep unitid latitude longitud;
run;

proc sort data=lats_longs;
	by unitid;
run;


*Merging in ipeds lats/longs to each score send (by IPEDS unitid);
%macro number(num=);

Data lats_longs&num;
	set lats_longs;
	rename unitid = unitid&num;
	rename latitude = lat&num;
	rename longitud = long&num;
run;

PROC SORT DATA = broadband;
	by unitid&NUM;
RUN;

data broadband;
	merge broadband (in = a) lats_longs&num ;
	by unitid&NUM;
	if a;
	dist&num = geodist(latitude,longitude,lat&num,long&num,'M') ;
	drop lat&num long&num;
RUN;

%mend number;



%number (num=01);
%number (num=02);
%number (num=03);
%number (num=04);
%number (num=05);
%number (num=06);
%number (num=07);
%number (num=08);
%number (num=09);
%number (num=10);
%number (num=11);
%number (num=12);
%number (num=13);
%number (num=14);
%number (num=15);
%number (num=16);
%number (num=17);
%number (num=18);
%number (num=19);
%number (num=20);
%number (num=21);
%number (num=22);
%number (num=23);
%number (num=24);
%number (num=25);
%number (num=26);
%number (num=27);
%number (num=28);
%number (num=29);
%number (num=30);


*Calculating some statistics regarding distance of score sends in portfolio;
data broadband;
	set broadband;
	mean_dist = mean(dist01,dist02,dist03,dist04,dist05,dist06,dist07,dist08,dist09,dist10,dist11,dist12,dist13,dist14,dist15,dist16,dist17,dist18,dist19,dist20,dist21,dist22,dist23,dist24,dist25,dist26,dist27,dist28,dist29,dist30);
	min_dist = min(dist01,dist02,dist03,dist04,dist05,dist06,dist07,dist08,dist09,dist10,dist11,dist12,dist13,dist14,dist15,dist16,dist17,dist18,dist19,dist20,dist21,dist22,dist23,dist24,dist25,dist26,dist27,dist28,dist29,dist30);
	max_dist = max(dist01,dist02,dist03,dist04,dist05,dist06,dist07,dist08,dist09,dist10,dist11,dist12,dist13,dist14,dist15,dist16,dist17,dist18,dist19,dist20,dist21,dist22,dist23,dist24,dist25,dist26,dist27,dist28,dist29,dist30);
	ct_dist = n(dist01,dist02,dist03,dist04,dist05,dist06,dist07,dist08,dist09,dist10,dist11,dist12,dist13,dist14,dist15,dist16,dist17,dist18,dist19,dist20,dist21,dist22,dist23,dist24,dist25,dist26,dist27,dist28,dist29,dist30);
	sd_dist = std(dist01,dist02,dist03,dist04,dist05,dist06,dist07,dist08,dist09,dist10,dist11,dist12,dist13,dist14,dist15,dist16,dist17,dist18,dist19,dist20,dist21,dist22,dist23,dist24,dist25,dist26,dist27,dist28,dist29,dist30);
	drop dist01--dist30;
run;



*Bringing in ipeds state, whether four-year and public;
data ipeds_state;
	set Other_raw.ipeds_info;
	fips = fips12;
	if level12 = 1 then four = 1; else four = 0;
	if control12 = 1 then public = 1; else public = 0;
	keep unitid four public fips;
run;


*Bringing in average SAT by year;
data ipeds_sat;
	set Other_raw.ipeds_avg_sat_scores;
run;


*Bringing in whether liberal arts college;
data ipeds_liberal_arts;
	set Other_raw.ipeds_liberal_arts;
run;

*Bringing in whether flagship;
data ipeds_flagship;
	set Other_raw.ipeds_flagship;
run;

proc sort data=ipeds_sat;
	by unitid;
run;

proc sort data=ipeds_four;
	by unitid;
run;

proc sort data=ipeds_state;
	by unitid;
run;

proc sort data=ipeds_liberal_arts;
	by unitid;
run;

proc sort data=ipeds_flagship;
	by unitid;
run;

*Merging all the IPEDS info together;
data ipeds_sat;
	merge ipeds_sat (in=a) ipeds_four ipeds_state ipeds_liberal_arts ipeds_flagship;
	by unitid;
	if a;
	if liberal_arts =. then liberal_arts = 0;
	if flagship =. then flagship = 0;
run;




*Merging in ipeds info to PSAT/SAT dataset;
%macro number(num=);
Data ipeds_sats&num;
	set ipeds_sat;
	rename unitid = unitid&num;
	rename sat = avg_sat&num;
	rename four = four&num;
	rename fips = fips&num;
	rename public = public&num;
	rename liberal_arts = liberal_arts&num;
	rename flagship = flagship&num;
run;

PROC SORT DATA = broadband;
	by unitid&NUM year;
RUN;

data broadband;
	merge broadband (in = a) ipeds_sats&num ;
	by unitid&NUM year;
	if a;
RUN;
%mend number;



%number (num=01);
%number (num=02);
%number (num=03);
%number (num=04);
%number (num=05);
%number (num=06);
%number (num=07);
%number (num=08);
%number (num=09);
%number (num=10);
%number (num=11);
%number (num=12);
%number (num=13);
%number (num=14);
%number (num=15);
%number (num=16);
%number (num=17);
%number (num=18);
%number (num=19);
%number (num=20);
%number (num=21);
%number (num=22);
%number (num=23);
%number (num=24);
%number (num=25);
%number (num=26);
%number (num=27);
%number (num=28);
%number (num=29);
%number (num=30);


*Creating some variables;
data broadband;
	set broadband;

*Student SAT;
Stud_sat = (SATVRECN + SATMRECN)*10;

*Whether each score send is academic match;
Match01 = avg_sat01 - stud_sat;
Match02 = avg_sat02 - stud_sat;
Match03 = avg_sat03 - stud_sat;
Match04 = avg_sat04 - stud_sat;
Match05 = avg_sat05 - stud_sat;
Match06 = avg_sat06 - stud_sat;
Match07 = avg_sat07 - stud_sat;
Match08 = avg_sat08 - stud_sat;
Match09 = avg_sat09 - stud_sat;
Match10 = avg_sat10 - stud_sat;
Match11 = avg_sat11 - stud_sat;
Match12 = avg_sat12 - stud_sat;
Match13 = avg_sat13 - stud_sat;
Match14 = avg_sat14 - stud_sat;
Match15 = avg_sat15 - stud_sat;
Match16 = avg_sat16 - stud_sat;
Match17 = avg_sat17 - stud_sat;
Match18 = avg_sat18 - stud_sat;
Match19 = avg_sat19 - stud_sat;
Match20 = avg_sat20 - stud_sat;
Match21 = avg_sat21 - stud_sat;
Match22 = avg_sat22 - stud_sat;
Match23 = avg_sat23 - stud_sat;
Match24 = avg_sat24 - stud_sat;
Match25 = avg_sat25 - stud_sat;
Match26 = avg_sat26 - stud_sat;
Match27 = avg_sat27 - stud_sat;
Match28 = avg_sat28 - stud_sat;
Match29 = avg_sat29 - stud_sat;
Match30 = avg_sat30 - stud_sat;
if match01 >= 0 then match_ind01=1;
if match02 >= 0 then match_ind02=1;
if match03 >= 0 then match_ind03=1;
if match04 >= 0 then match_ind04=1;
if match05 >= 0 then match_ind05=1;
if match06 >= 0 then match_ind06=1;
if match07 >= 0 then match_ind07=1;
if match08 >= 0 then match_ind08=1;
if match09 >= 0 then match_ind09=1;
if match10 >= 0 then match_ind10=1;
if match11 >= 0 then match_ind11=1;
if match12 >= 0 then match_ind12=1;
if match13 >= 0 then match_ind13=1;
if match14 >= 0 then match_ind14=1;
if match15 >= 0 then match_ind15=1;
if match16 >= 0 then match_ind16=1;
if match17 >= 0 then match_ind17=1;
if match18 >= 0 then match_ind18=1;
if match19 >= 0 then match_ind19=1;
if match20 >= 0 then match_ind20=1;
if match21 >= 0 then match_ind21=1;
if match22 >= 0 then match_ind22=1;
if match23 >= 0 then match_ind23=1;
if match24 >= 0 then match_ind24=1;
if match25 >= 0 then match_ind25=1;
if match26 >= 0 then match_ind26=1;
if match27 >= 0 then match_ind27=1;
if match28 >= 0 then match_ind28=1;
if match29 >= 0 then match_ind29=1;
if match30 >= 0 then match_ind30=1;

if avg_sat01 >= 1300 then match_ind01=1;
if avg_sat02 >= 1300 then match_ind02=1;
if avg_sat03 >= 1300 then match_ind03=1;
if avg_sat04 >= 1300 then match_ind04=1;
if avg_sat05 >= 1300 then match_ind05=1;
if avg_sat06 >= 1300 then match_ind06=1;
if avg_sat07 >= 1300 then match_ind07=1;
if avg_sat08 >= 1300 then match_ind08=1;
if avg_sat09 >= 1300 then match_ind09=1;
if avg_sat10 >= 1300 then match_ind10=1;
if avg_sat11 >= 1300 then match_ind11=1;
if avg_sat12 >= 1300 then match_ind12=1;
if avg_sat13 >= 1300 then match_ind13=1;
if avg_sat14 >= 1300 then match_ind14=1;
if avg_sat15 >= 1300 then match_ind15=1;
if avg_sat16 >= 1300 then match_ind16=1;
if avg_sat17 >= 1300 then match_ind17=1;
if avg_sat18 >= 1300 then match_ind18=1;
if avg_sat19 >= 1300 then match_ind19=1;
if avg_sat20 >= 1300 then match_ind20=1;
if avg_sat21 >= 1300 then match_ind21=1;
if avg_sat22 >= 1300 then match_ind22=1;
if avg_sat23 >= 1300 then match_ind23=1;
if avg_sat24 >= 1300 then match_ind24=1;
if avg_sat25 >= 1300 then match_ind25=1;
if avg_sat26 >= 1300 then match_ind26=1;
if avg_sat27 >= 1300 then match_ind27=1;
if avg_sat28 >= 1300 then match_ind28=1;
if avg_sat29 >= 1300 then match_ind29=1;
if avg_sat30 >= 1300 then match_ind30=1;

*Whether each score send is to college with average SAT > 1200;
count1200 = 0;
if avg_sat01 >= 1200 then count1200 = count1200 + 1;
if avg_sat02 >= 1200 then count1200 = count1200 + 1;
if avg_sat03 >= 1200 then count1200 = count1200 + 1;
if avg_sat04 >= 1200 then count1200 = count1200 + 1;
if avg_sat05 >= 1200 then count1200 = count1200 + 1;
if avg_sat06 >= 1200 then count1200 = count1200 + 1;
if avg_sat07 >= 1200 then count1200 = count1200 + 1;
if avg_sat08 >= 1200 then count1200 = count1200 + 1;
if avg_sat09 >= 1200 then count1200 = count1200 + 1;
if avg_sat10 >= 1200 then count1200 = count1200 + 1;
if avg_sat11 >= 1200 then count1200 = count1200 + 1;
if avg_sat12 >= 1200 then count1200 = count1200 + 1;
if avg_sat13 >= 1200 then count1200 = count1200 + 1;
if avg_sat14 >= 1200 then count1200 = count1200 + 1;
if avg_sat15 >= 1200 then count1200 = count1200 + 1;
if avg_sat16 >= 1200 then count1200 = count1200 + 1;
if avg_sat17 >= 1200 then count1200 = count1200 + 1;
if avg_sat18 >= 1200 then count1200 = count1200 + 1;
if avg_sat19 >= 1200 then count1200 = count1200 + 1;
if avg_sat20 >= 1200 then count1200 = count1200 + 1;
if avg_sat21 >= 1200 then count1200 = count1200 + 1;
if avg_sat22 >= 1200 then count1200 = count1200 + 1;
if avg_sat23 >= 1200 then count1200 = count1200 + 1;
if avg_sat24 >= 1200 then count1200 = count1200 + 1;
if avg_sat25 >= 1200 then count1200 = count1200 + 1;
if avg_sat26 >= 1200 then count1200 = count1200 + 1;
if avg_sat27 >= 1200 then count1200 = count1200 + 1;
if avg_sat28 >= 1200 then count1200 = count1200 + 1;
if avg_sat29 >= 1200 then count1200 = count1200 + 1;
if avg_sat30 >= 1200 then count1200 = count1200 + 1;

*Whether each score send is to college with average SAT > 1300;
count1300 = 0;
if avg_sat01 >= 1300 then count1300 = count1300 + 1;
if avg_sat02 >= 1300 then count1300 = count1300 + 1;
if avg_sat03 >= 1300 then count1300 = count1300 + 1;
if avg_sat04 >= 1300 then count1300 = count1300 + 1;
if avg_sat05 >= 1300 then count1300 = count1300 + 1;
if avg_sat06 >= 1300 then count1300 = count1300 + 1;
if avg_sat07 >= 1300 then count1300 = count1300 + 1;
if avg_sat08 >= 1300 then count1300 = count1300 + 1;
if avg_sat09 >= 1300 then count1300 = count1300 + 1;
if avg_sat10 >= 1300 then count1300 = count1300 + 1;
if avg_sat11 >= 1300 then count1300 = count1300 + 1;
if avg_sat12 >= 1300 then count1300 = count1300 + 1;
if avg_sat13 >= 1300 then count1300 = count1300 + 1;
if avg_sat14 >= 1300 then count1300 = count1300 + 1;
if avg_sat15 >= 1300 then count1300 = count1300 + 1;
if avg_sat16 >= 1300 then count1300 = count1300 + 1;
if avg_sat17 >= 1300 then count1300 = count1300 + 1;
if avg_sat18 >= 1300 then count1300 = count1300 + 1;
if avg_sat19 >= 1300 then count1300 = count1300 + 1;
if avg_sat20 >= 1300 then count1300 = count1300 + 1;
if avg_sat21 >= 1300 then count1300 = count1300 + 1;
if avg_sat22 >= 1300 then count1300 = count1300 + 1;
if avg_sat23 >= 1300 then count1300 = count1300 + 1;
if avg_sat24 >= 1300 then count1300 = count1300 + 1;
if avg_sat25 >= 1300 then count1300 = count1300 + 1;
if avg_sat26 >= 1300 then count1300 = count1300 + 1;
if avg_sat27 >= 1300 then count1300 = count1300 + 1;
if avg_sat28 >= 1300 then count1300 = count1300 + 1;
if avg_sat29 >= 1300 then count1300 = count1300 + 1;
if avg_sat30 >= 1300 then count1300 = count1300 + 1;


*Whether each score send is to a liberal arts college with average SAT > 1200;
count1200_lib_arts = 0;
if avg_sat01 >= 1200 & liberal_arts01 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat02 >= 1200 & liberal_arts02 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat03 >= 1200 & liberal_arts03 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat04 >= 1200 & liberal_arts04 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat05 >= 1200 & liberal_arts05 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat06 >= 1200 & liberal_arts06 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat07 >= 1200 & liberal_arts07 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat08 >= 1200 & liberal_arts08 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat09 >= 1200 & liberal_arts09 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat10 >= 1200 & liberal_arts10 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat11 >= 1200 & liberal_arts11 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat12 >= 1200 & liberal_arts12 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat13 >= 1200 & liberal_arts13 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat14 >= 1200 & liberal_arts14 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat15 >= 1200 & liberal_arts15 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat16 >= 1200 & liberal_arts16 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat17 >= 1200 & liberal_arts17 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat18 >= 1200 & liberal_arts18 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat19 >= 1200 & liberal_arts19 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat20 >= 1200 & liberal_arts20 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat21 >= 1200 & liberal_arts21 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat22 >= 1200 & liberal_arts22 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat23 >= 1200 & liberal_arts23 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat24 >= 1200 & liberal_arts24 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat25 >= 1200 & liberal_arts25 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat26 >= 1200 & liberal_arts26 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat27 >= 1200 & liberal_arts27 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat28 >= 1200 & liberal_arts28 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat29 >= 1200 & liberal_arts29 = 1 then count1200_lib_arts = count1200_lib_arts + 1;
if avg_sat30 >= 1200 & liberal_arts30 = 1 then count1200_lib_arts = count1200_lib_arts + 1;

*Whether each score send is to a liberal arts college with average SAT > 1300;
count1300_lib_arts = 0;
if avg_sat01 >= 1300 & liberal_arts01 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat02 >= 1300 & liberal_arts02 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat03 >= 1300 & liberal_arts03 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat04 >= 1300 & liberal_arts04 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat05 >= 1300 & liberal_arts05 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat06 >= 1300 & liberal_arts06 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat07 >= 1300 & liberal_arts07 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat08 >= 1300 & liberal_arts08 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat09 >= 1300 & liberal_arts09 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat10 >= 1300 & liberal_arts10 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat11 >= 1300 & liberal_arts11 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat12 >= 1300 & liberal_arts12 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat13 >= 1300 & liberal_arts13 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat14 >= 1300 & liberal_arts14 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat15 >= 1300 & liberal_arts15 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat16 >= 1300 & liberal_arts16 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat17 >= 1300 & liberal_arts17 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat18 >= 1300 & liberal_arts18 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat19 >= 1300 & liberal_arts19 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat20 >= 1300 & liberal_arts20 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat21 >= 1300 & liberal_arts21 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat22 >= 1300 & liberal_arts22 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat23 >= 1300 & liberal_arts23 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat24 >= 1300 & liberal_arts24 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat25 >= 1300 & liberal_arts25 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat26 >= 1300 & liberal_arts26 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat27 >= 1300 & liberal_arts27 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat28 >= 1300 & liberal_arts28 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat29 >= 1300 & liberal_arts29 = 1 then count1300_lib_arts = count1300_lib_arts + 1;
if avg_sat30 >= 1300 & liberal_arts30 = 1 then count1300_lib_arts = count1300_lib_arts + 1;

*Whether each score send is to an instate college;
instate = 0;
if statefip = fips01 then instate = instate + 1;
if statefip = fips02 then instate = instate + 1;
if statefip = fips03 then instate = instate + 1;
if statefip = fips04 then instate = instate + 1;
if statefip = fips05 then instate = instate + 1;
if statefip = fips06 then instate = instate + 1;
if statefip = fips07 then instate = instate + 1;
if statefip = fips08 then instate = instate + 1;
if statefip = fips09 then instate = instate + 1;
if statefip = fips10 then instate = instate + 1;
if statefip = fips11 then instate = instate + 1;
if statefip = fips12 then instate = instate + 1;
if statefip = fips13 then instate = instate + 1;
if statefip = fips14 then instate = instate + 1;
if statefip = fips15 then instate = instate + 1;
if statefip = fips16 then instate = instate + 1;
if statefip = fips17 then instate = instate + 1;
if statefip = fips18 then instate = instate + 1;
if statefip = fips19 then instate = instate + 1;
if statefip = fips20 then instate = instate + 1;
if statefip = fips21 then instate = instate + 1;
if statefip = fips22 then instate = instate + 1;
if statefip = fips23 then instate = instate + 1;
if statefip = fips24 then instate = instate + 1;
if statefip = fips25 then instate = instate + 1;
if statefip = fips26 then instate = instate + 1;
if statefip = fips27 then instate = instate + 1;
if statefip = fips28 then instate = instate + 1;
if statefip = fips29 then instate = instate + 1;
if statefip = fips30 then instate = instate + 1;

*Whether each score send is to an instate flagship college;
instate_flag = 0;
if statefip = fips01 & flagship01 = 1 then instate_flag = instate_flag + 1;
if statefip = fips02 & flagship02 = 1 then instate_flag = instate_flag + 1;
if statefip = fips03 & flagship03 = 1 then instate_flag = instate_flag + 1;
if statefip = fips04 & flagship04 = 1 then instate_flag = instate_flag + 1;
if statefip = fips05 & flagship05 = 1 then instate_flag = instate_flag + 1;
if statefip = fips06 & flagship06 = 1 then instate_flag = instate_flag + 1;
if statefip = fips07 & flagship07 = 1 then instate_flag = instate_flag + 1;
if statefip = fips08 & flagship08 = 1 then instate_flag = instate_flag + 1;
if statefip = fips09 & flagship09 = 1 then instate_flag = instate_flag + 1;
if statefip = fips10 & flagship10 = 1 then instate_flag = instate_flag + 1;
if statefip = fips11 & flagship11 = 1 then instate_flag = instate_flag + 1;
if statefip = fips12 & flagship12 = 1 then instate_flag = instate_flag + 1;
if statefip = fips13 & flagship13 = 1 then instate_flag = instate_flag + 1;
if statefip = fips14 & flagship14 = 1 then instate_flag = instate_flag + 1;
if statefip = fips15 & flagship15 = 1 then instate_flag = instate_flag + 1;
if statefip = fips16 & flagship16 = 1 then instate_flag = instate_flag + 1;
if statefip = fips17 & flagship17 = 1 then instate_flag = instate_flag + 1;
if statefip = fips18 & flagship18 = 1 then instate_flag = instate_flag + 1;
if statefip = fips19 & flagship19 = 1 then instate_flag = instate_flag + 1;
if statefip = fips20 & flagship20 = 1 then instate_flag = instate_flag + 1;
if statefip = fips21 & flagship21 = 1 then instate_flag = instate_flag + 1;
if statefip = fips22 & flagship22 = 1 then instate_flag = instate_flag + 1;
if statefip = fips23 & flagship23 = 1 then instate_flag = instate_flag + 1;
if statefip = fips24 & flagship24 = 1 then instate_flag = instate_flag + 1;
if statefip = fips25 & flagship25 = 1 then instate_flag = instate_flag + 1;
if statefip = fips26 & flagship26 = 1 then instate_flag = instate_flag + 1;
if statefip = fips27 & flagship27 = 1 then instate_flag = instate_flag + 1;
if statefip = fips28 & flagship28 = 1 then instate_flag = instate_flag + 1;
if statefip = fips29 & flagship29 = 1 then instate_flag = instate_flag + 1;
if statefip = fips30 & flagship30 = 1 then instate_flag = instate_flag + 1;

*Getting count of score sends to four-year colleges;
est_apps = 0;
if unitid01 ~=. then est_apps= est_apps+1;
if unitid02 ~=. then est_apps= est_apps+1;
if unitid03 ~=. then est_apps= est_apps+1;
if unitid04 ~=. then est_apps= est_apps+1;
if unitid05 ~=. then est_apps= est_apps+1;
if unitid06 ~=. then est_apps= est_apps+1;
if unitid07 ~=. then est_apps= est_apps+1;
if unitid08 ~=. then est_apps= est_apps+1;
if unitid09 ~=. then est_apps= est_apps+1;
if unitid10 ~=. then est_apps= est_apps+1;
if unitid11 ~=. then est_apps= est_apps+1;
if unitid12 ~=. then est_apps= est_apps+1;
if unitid13 ~=. then est_apps= est_apps+1;
if unitid14 ~=. then est_apps= est_apps+1;
if unitid15 ~=. then est_apps= est_apps+1;
if unitid16 ~=. then est_apps= est_apps+1;
if unitid17 ~=. then est_apps= est_apps+1;
if unitid18 ~=. then est_apps= est_apps+1;
if unitid19 ~=. then est_apps= est_apps+1;
if unitid20 ~=. then est_apps= est_apps+1;
if unitid21 ~=. then est_apps= est_apps+1;
if unitid22 ~=. then est_apps= est_apps+1;
if unitid23 ~=. then est_apps= est_apps+1;
if unitid24 ~=. then est_apps= est_apps+1;
if unitid25 ~=. then est_apps= est_apps+1;
if unitid26 ~=. then est_apps= est_apps+1;
if unitid27 ~=. then est_apps= est_apps+1;
if unitid28 ~=. then est_apps= est_apps+1;
if unitid29 ~=. then est_apps= est_apps+1;
if unitid30 ~=. then est_apps= est_apps+1;


	*Fraction of score sends instate;
	fraction_instate = instate/est_apps;

	*Number of four-year score sends;
	four_year_apps = sum(four01,four02, four03, four04, four05, four06, four07, four08, four09, four10, four11, four12, four13, four14, four15, four16, four17, four18, four19, four20, four21, four22, four23, four24, four25, four26, four27, four28, four29, four30);
	if four_year_apps=. then four_year_apps = 0;
	
	*Whether sent score to any academic match college;
	match_max = max(match01, match02, match03, match04, match05, match06, match07, match08, match09, match10, match11, match12, match13, match14, match15, match16, match17, match18, match19, match20, match21, match22, match23, match24, match25, match26, match27, match28, match29, match30); 

	*Number of match colleges;
	num_match = sum(match_ind01, match_ind02, match_ind03, match_ind04, match_ind05, match_ind06, match_ind07, match_ind08, match_ind09, match_ind10, match_ind11, match_ind12, match_ind13, match_ind14, match_ind15, match_ind16, match_ind17, match_ind18, match_ind19, match_ind20, match_ind21, match_ind22, match_ind23, match_ind24, match_ind25, match_ind26, match_ind27, match_ind28, match_ind29, match_ind30);
	if num_match =. then num_match = 0;
	if num_match >=1 then one_match = 1; else one_match = 0;
	fraction_match = num_match/est_apps;

	*Number of score sends to public colleges;
	public_apps = sum(public01,public02, public03, public04, public05, public06, public07, public08, public09, public10, public11, public12, public13, public14, public15, public16, public17, public18, public19, public20, public21, public22, public23, public24, public25, public26, public27, public28, public29, public30);
	if public_apps =. then public_apps = 0;

	*SAT statistics of score sends portfolio;
	avg_sat = mean(avg_sat01,avg_sat02,avg_sat03,avg_sat04,avg_sat05,avg_sat06,avg_sat07,avg_sat08,avg_sat09,avg_sat10,avg_sat11,avg_sat12,avg_sat13,avg_sat14,avg_sat15,avg_sat16,avg_sat17,avg_sat18,avg_sat19,avg_sat20,avg_sat21,avg_sat22,avg_sat23,avg_sat24,avg_sat25,avg_sat26,avg_sat27,avg_sat28,avg_sat29,avg_sat30);
	min_sat = min(avg_sat01,avg_sat02,avg_sat03,avg_sat04,avg_sat05,avg_sat06,avg_sat07,avg_sat08,avg_sat09,avg_sat10,avg_sat11,avg_sat12,avg_sat13,avg_sat14,avg_sat15,avg_sat16,avg_sat17,avg_sat18,avg_sat19,avg_sat20,avg_sat21,avg_sat22,avg_sat23,avg_sat24,avg_sat25,avg_sat26,avg_sat27,avg_sat28,avg_sat29,avg_sat30);
	max_sat = max(avg_sat01,avg_sat02,avg_sat03,avg_sat04,avg_sat05,avg_sat06,avg_sat07,avg_sat08,avg_sat09,avg_sat10,avg_sat11,avg_sat12,avg_sat13,avg_sat14,avg_sat15,avg_sat16,avg_sat17,avg_sat18,avg_sat19,avg_sat20,avg_sat21,avg_sat22,avg_sat23,avg_sat24,avg_sat25,avg_sat26,avg_sat27,avg_sat28,avg_sat29,avg_sat30);
	ct_sat = n(avg_sat01,avg_sat02,avg_sat03,avg_sat04,avg_sat05,avg_sat06,avg_sat07,avg_sat08,avg_sat09,avg_sat10,avg_sat11,avg_sat12,avg_sat13,avg_sat14,avg_sat15,avg_sat16,avg_sat17,avg_sat18,avg_sat19,avg_sat20,avg_sat21,avg_sat22,avg_sat23,avg_sat24,avg_sat25,avg_sat26,avg_sat27,avg_sat28,avg_sat29,avg_sat30);
	sd_sat = std(avg_sat01,avg_sat02,avg_sat03,avg_sat04,avg_sat05,avg_sat06,avg_sat07,avg_sat08,avg_sat09,avg_sat10,avg_sat11,avg_sat12,avg_sat13,avg_sat14,avg_sat15,avg_sat16,avg_sat17,avg_sat18,avg_sat19,avg_sat20,avg_sat21,avg_sat22,avg_sat23,avg_sat24,avg_sat25,avg_sat26,avg_sat27,avg_sat28,avg_sat29,avg_sat30);

	drop unitid01--unitid30 avg_sat01--flagship30 Match01--Match_ind30 public01--public30 ;
run;



*Saving final College Board analytic dataset;
data Processed.cb_data_00_08;
	set broadband;
run;

** Comment out previous data step and uncomment out next datastep to do 1996-1998 dataset **;
/*
*Saving 1996-1998 College Board analytic dataset (for Figure 2);
data Processed.cb_data_96_96;
	set broadband;
run;
*/






****** Creating state/year SAT characteristics *******;
data states;
	set broadband;
	keep statefip year parent_educ1--parent_educ4 white black hisp other sat;

	*Parental education;
	dad_educ = .;
	if fatheduc >=1 & fatheduc <=2 then dad_educ = 1;
	if fatheduc = 3 then dad_educ = 2; 
	if fatheduc >=4 & fatheduc <=6 then dad_educ = 3; 
	if fatheduc >=7 & fatheduc <=9 then dad_educ = 4; 

	mom_educ = .;
	if motheduc >=1 & motheduc <=2 then mom_educ = 1;
	if motheduc = 3 then mom_educ = 2; 
	if motheduc >=4 & motheduc <=6 then mom_educ = 3; 
	if motheduc >=7 & motheduc <=9 then mom_educ = 4; 

	parent_highest=dad_educ;
	if mom_educ>dad_educ then parent_highest=mom_educ;
	if dad_educ =. then parent_highest=mom_educ;
	if parent_highest ~=.;

	parent_educ1 = 0;
	if parent_highest = 1 then parent_educ1 = 1;
	parent_educ2 = 0;
	if parent_highest = 2 then parent_educ2 = 1;
	parent_educ3 = 0;
	if parent_highest = 3 then parent_educ3 = 1;
	parent_educ4 = 0;
	if parent_highest = 4 then parent_educ4 = 1;

	*Race/ethnicity;
	race = 4;
	if ethdist = 7 then race = 1;
	if ethdist = 3 then race = 2;
	if ethdist >= 4 & ethdist <=6 then race = 3;
	
	white = 0;
	if race = 1 then white = 1;
	black = 0;
	if race = 2 then black = 1;
	hisp = 0;
	if race = 3 then hisp = 1;
	other = 0;
	if race = 4 then other = 1;
	
	*SAT;
	sat = SATVRECN + SATMRECN;

run;


proc sort data = states;
	by statefip year;
run;

proc means data = states noprint mean;
	by statefip year;
	output out=state_avg mean=;
run;

data state_avg;
	set state_avg;
	state_sat_ct = _freq_;
	state_sat_parent_educ1 = parent_educ1;
	state_sat_parent_educ2 = parent_educ2;
	state_sat_parent_educ3 = parent_educ3;
	state_sat_parent_educ4 = parent_educ4;
	state_sat_white = white;
	state_sat_black = black;
	state_sat_hisp = hisp;
	state_sat_other = other;
	state_sat_mean = sat;
	drop _type_ _freq_ parent_educ1--sat;
run;

proc means data = states noprint p10 p25 p50 p75 p90;
	by statefip year;
	var sat;
	output out=state_tiles p10=state_sat_p10 p25=state_sat_p25 p50=state_sat_p50 p75=state_sat_p75 p90=state_sat_p90;
run;

data state_avg;
	merge state_avg state_tiles;
	by statefip year;
	drop _type_ _freq_;
run;


data broad.state_avg;
	set state_avg;
run;
